# input
tse_file <- here::here("data", "processed", "tse_cleaned.rds")
igc_file <- here::here("data", "raw", "dataTable.rds")
# output
out_dir <- here::here("data", "02_alpha_diversity")
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)02 - Alpha Diversity Analysis
1 Define the input and output paths
2 Set-up
# load libraries
library(magrittr)
library(patchwork)
suppressPackageStartupMessages(library(tidySingleCellExperiment))3 Computate Alpha Diversity Metrics
3.1 Load TreeSE
# load
tse <- readr::read_rds(tse_file)3.2 Add Gene Richness
# load
igc_df <- readr::read_rds(igc_file) %>% tibble::as_tibble()
# threshold
threshold <-
igc_df %>%
dplyr::group_by(SampleID) %>%
dplyr::summarise(NumberMappedReads = max(NumberMappedReads)) %>%
dplyr::summarise(q = quantile(NumberMappedReads, 0.02)) %>%
dplyr::pull()
# extract
igc_df <-
igc_df %>%
dplyr::filter(ReadCountReal >= threshold) %>%
dplyr::group_by(SampleID) %>%
dplyr::summarise(gene_richness = min(GeneNumber))
# merge
tse <- tse %>% dplyr::left_join(igc_df, by = "SampleID")Loading required namespace: TreeSummarizedExperiment
3.3 Compute Alpha Diversity Metrics
# select metrics
metrics <- c(
"shannon_diversity", "gini_dominance","observed_richness", "pielou_evenness"
)
# compute
tse <-
tse %>%
mia::addAlpha(
index = metrics,
sample = quantile(colSums(assay(tse, "counts")), 0.02),
niter = 10,
BPPARAM = BiocParallel::MulticoreParam()
)4 Plot Alpha Diversity Metrics
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map( ~ {
# prepare data
plt_df <- colData(tse) %>% tibble::as_tibble()
# calculate stats
stats_2 <-
plt_df %>%
rstatix::group_by(treatment) %>%
rstatix::wilcox_test(
formula = formula(glue::glue("{.x} ~ time_point")),
p.adjust.method = "fdr"
) %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "time_point", group = "treatment") %>%
dplyr::mutate(y.position = y.position)
# plot
plt <-
plt_df %>%
tidyr::drop_na(!!dplyr::sym(.x)) %>%
tidyplots::tidyplot(x = time_point, y = !!dplyr::sym(.x), colour = treatment) %>%
tidyplots::add_boxplot(show_outliers = FALSE) %>%
tidyplots::add_data_points_jitter(alpha = 0.4) %>%
tidyplots::add_test_asterisks(
method = "wilcox_test", hide_info = TRUE, bracket.nudge.y = 0.05, tip.length = 0.01
) %>%
tidyplots::add(ggpubr::stat_pvalue_manual(
stats_2, label = "p.adj.signif", hide.ns = TRUE, tip.length = 0.01,
)) %>%
tidyplots::adjust_x_axis_title("Time Point (weeks)") %>%
tidyplots::adjust_legend_title("Treatment") %>%
tidyplots::adjust_y_axis_title(
.x %>% stringr::str_replace_all("_", " ") %>% stringr::str_to_title()
) %>%
tidyplots::adjust_colors(tidyplots::colors_discrete_friendly)
if (.x == "gene_richness") {
plt <- plt %>% tidyplots::adjust_y_axis(labels = scales::scientific)
}
# save
plt %>%
tidyplots::adjust_size(width = 40, height = 40, unit = "mm") %>%
tidyplots::save_plot(
here::here(out_dir, glue::glue("alpha_{.x}.pdf")),
view_plot = FALSE
)
plt
})5 Compute Correlations
5.1 Define Cytokines and Markers
populations <- c("CD4", "CD8", "CD4_nadir", "CD4_CD38_DR", "CD8_CD38_DR")
cytokines <- c("CRP", "IL6", "TNFa", "sCD14")
others <- c("HIV_VL", "BMI")
all_markers <- c(populations, cytokines, others)5.2 Cytokines Log2 Transformation
tse2 <- tse %>% dplyr::mutate(dplyr::across(dplyr::all_of(cytokines), ~ log2(.x + 1)))5.3 Compute Correlations
corr_df <-
mia::getCrossAssociation(
tse2,
tse2,
col.var1 = c(metrics, "gene_richness"),
col.var2 = all_markers,
method = "spearman",
p_adj_method = "fdr",
test.signif = TRUE,
verbose = FALSE,
show_warnings = FALSE
)5.4 Plot Global Correlations
# prepare data
plt_df <- corr_df %>% dplyr::mutate(sign = p_adj < 0.05 & abs(cor) >= 0.2)
# plot
lim <- abs(plt_df$cor) %>% max() %>% round(digits = 1) * 1.05
plt <-
plt_df %>%
dplyr::mutate(lab = stringr::str_replace(Var1, "_", " ") %>% stringr::str_to_sentence()) %>%
ggplot(aes(x = Var2, y = lab, fill = cor, colour = sign)) +
geom_point(aes(size = abs(cor), alpha = sign), shape = 22, stroke = 1) +
scale_fill_gradientn(
colours = rev(RColorBrewer::brewer.pal(11, "PiYG")),
limits = c(-lim, lim)
) +
scale_colour_manual(values = c("TRUE" = "black", "FALSE" = "white")) +
scale_alpha_manual(values = c("TRUE" = 0.9, "FALSE" = 0.6), guide = "none") +
scale_size_continuous(range = c(2, 8), guide = "none") +
theme_minimal() +
labs(
x = "", y = "", fill = "Spearman Correlation", colour = "q < 0.05"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
legend.title.position = "top",
legend.title = element_text(hjust = 0.5),
legend.key.height = unit(0.5, "cm"),
)
# Resize and save
r_plt <-
plt + plot_spacer() +
plot_layout(
widths = ggplot2::unit(c(75, 1), "mm"),
heights = ggplot2::unit(40, "mm")
)
tidyplots::save_plot(
r_plt,
here::here(out_dir, glue::glue("all_correlation.pdf")),
view_plot = FALSE
)
r_plt6 Compute Correlations by Treatment Group
6.1 Compute Correlations
corr_df <-
mia::splitOn(tse2, "treatment") %>%
purrr::map_dfr(~ {
mia::getCrossAssociation(
.x,
.x,
col.var1 = c(metrics, "gene_richness"),
col.var2 = all_markers,
method = "spearman",
p_adj_method = "fdr",
test.signif = TRUE,
verbose = FALSE,
show_warnings = FALSE
) %>%
dplyr::mutate(treatment = unique(.x$treatment))
})6.2 Plot Global Correlations
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map(~ {
# prepare data
plt_df <-
corr_df %>%
dplyr::filter(Var1 == .x) %>%
dplyr::mutate(sign = p_adj < 0.05 & abs(cor) >= 0.2)
# plot
.name <- stringr::str_replace(.x, "_", " ") %>% stringr::str_to_title()
lim <- abs(plt_df$cor) %>% max() %>% round(digits = 1)
plt <-
plt_df %>%
ggplot(aes(x = Var2, y = treatment, fill = cor, colour = sign)) +
geom_point(aes(size = abs(cor), alpha = sign), shape = 22, stroke = 1) +
scale_fill_gradientn(
colours = rev(RColorBrewer::brewer.pal(11, "PiYG")),
limits = c(-lim, lim)
) +
scale_colour_manual(values = c("TRUE" = "black", "FALSE" = "white")) +
scale_alpha_manual(values = c("TRUE" = 0.9, "FALSE" = 0.6), guide = "none") +
scale_size_continuous(range = c(2, 8), guide = "none") +
theme_minimal() +
labs(
x = "",
y = "Treatment",
fill = glue::glue("Spearman Correlation\n(Mark. vs. {.name})"),
colour = "q < 0.05"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
legend.title.position = "top",
legend.title = element_text(hjust = 0.5),
legend.key.height = unit(0.5, "cm"),
)
# Resize and save
plt <-
plt + plot_spacer() +
plot_layout(
widths = ggplot2::unit(c(70, 1), "mm"),
heights = ggplot2::unit(30, "mm")
)
tidyplots::save_plot(
plt,
here::here(out_dir, glue::glue("correlation_{.x}.pdf")),
view_plot = FALSE
)
plt
})6.3 Plot Scatter Correlations
plt_df <- colData(tse2) %>% tibble::as_tibble()
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map(function(.metric) {
all_markers %>%
purrr::set_names() %>%
purrr::map(function(.marker) {
# prepare data
marker_values <- plt_df[[.marker]] %>% .[is.finite(.)]
lab_min <- min(marker_values, na.rm = TRUE)
lab_max <- max(marker_values, na.rm = TRUE)
lab_range <- lab_max - lab_min
.name <- stringr::str_replace(.metric, "_", " ") %>% stringr::str_to_title()
.y_lab <- dplyr::if_else(
.marker %in% cytokines, glue::glue("Log2 {.marker}"), .marker
)
plt <-
plt_df %>%
tidyr::drop_na(!!.metric, !!.marker) %>%
tidyplots::tidyplot(
x = !!dplyr::sym(.metric),
y = !!dplyr::sym(.marker),
colour = treatment
) %>%
tidyplots::add_data_points(alpha = 0.5) %>%
tidyplots::add(geom_smooth(method = "lm", alpha = 0.1, formula = 'y ~ x')) %>%
tidyplots::add(ggpubr::stat_cor(
method = "spearman",
cor.coef.name = "rho",
label.y.npc = "bottom",
p.digits = 1,
label.y = c(lab_min - 0.05 * lab_range, lab_min - 0.15 * lab_range),
size = 3
)) %>%
tidyplots::adjust_legend_title("Treatment") %>%
tidyplots::adjust_y_axis_title(.y_lab) %>%
tidyplots::adjust_x_axis_title(.name) %>%
tidyplots::adjust_x_axis(rotate_labels = 30) %>%
tidyplots::adjust_colors(tidyplots::colors_discrete_friendly)
if (.metric == "gene_richness") {
plt <- plt %>% tidyplots::adjust_x_axis(labels = scales::scientific)
}
# save
plt %>%
tidyplots::adjust_size(width = 50, height = 40, unit = "mm") %>%
tidyplots::save_plot(
here::here(out_dir, glue::glue("corr_{.metric}_{.marker}.pdf")),
view_plot = FALSE
)
plt
})
})7 Export TSE with Alpha Metrics
tse %>% readr::write_rds(here::here("data", "processed", "tse_alpha.rds"))8 Appendix
8.1 Session Info
devtools::session_info()─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.5.0 alpha (2025-03-25 r88054)
os Ubuntu 22.04.5 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz Europe/Madrid
date 2025-03-28
pandoc 3.2 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
quarto 1.5.57 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-8 2024-09-12 [1] RSPM (R 4.5.0)
ape 5.8-1 2024-12-16 [1] RSPM
backports 1.5.0 2024-05-23 [1] RSPM (R 4.5.0)
beachmat 2.23.7 2025-03-17 [1] Bioconduc~
beeswarm 0.4.0 2021-06-01 [1] RSPM
Biobase * 2.67.0 2024-10-29 [1] Bioconduc~
BiocBaseUtils 1.9.0 2024-10-29 [1] Bioconduc~
BiocGenerics * 0.53.6 2025-01-27 [1] Bioconduc~
BiocNeighbors 2.1.3 2025-03-05 [1] Bioconduc~
BiocParallel 1.41.2 2025-02-19 [1] Bioconduc~
BiocSingular 1.23.0 2024-10-29 [1] Bioconduc~
Biostrings 2.75.4 2025-02-21 [1] Bioconduc~
bluster 1.17.0 2024-10-29 [1] Bioconduc~
broom 1.0.7 2024-09-26 [1] RSPM (R 4.5.0)
cachem 1.1.0 2024-05-16 [1] RSPM (R 4.5.0)
car 3.1-3 2024-09-27 [1] RSPM (R 4.5.0)
carData 3.0-5 2022-01-06 [1] RSPM (R 4.5.0)
cellranger 1.1.0 2016-07-27 [1] RSPM
cli 3.6.4 2025-02-13 [1] RSPM (R 4.5.0)
cluster 2.1.8.1 2025-03-12 [2] CRAN (R 4.5.0)
coda 0.19-4.1 2024-01-31 [1] RSPM (R 4.5.0)
codetools 0.2-20 2024-03-31 [2] CRAN (R 4.5.0)
colorspace 2.1-1 2024-07-26 [1] RSPM (R 4.5.0)
crayon 1.5.3 2024-06-20 [1] RSPM (R 4.5.0)
data.table 1.17.0 2025-02-22 [1] RSPM (R 4.5.0)
DBI 1.2.3 2024-06-02 [1] RSPM
DECIPHER 3.3.4 2025-03-11 [1] Bioconduc~
decontam 1.27.0 2024-10-29 [1] Bioconduc~
DelayedArray 0.33.6 2025-02-14 [1] Bioconduc~
DelayedMatrixStats 1.29.1 2025-01-09 [1] Bioconduc~
devtools 2.4.5 2022-10-11 [1] RSPM
digest 0.6.37 2024-08-19 [1] RSPM (R 4.5.0)
DirichletMultinomial 1.49.0 2024-10-29 [1] Bioconduc~
dplyr * 1.1.4 2023-11-17 [1] RSPM (R 4.5.0)
ellipsis 0.3.2 2021-04-29 [1] RSPM
emmeans 1.11.0 2025-03-20 [1] RSPM
estimability 1.5.1 2024-05-12 [1] RSPM
evaluate 1.0.3 2025-01-10 [1] RSPM (R 4.5.0)
fansi 1.0.6 2023-12-08 [1] RSPM (R 4.5.0)
farver 2.1.2 2024-05-13 [1] RSPM (R 4.5.0)
fastmap 1.2.0 2024-05-15 [1] RSPM (R 4.5.0)
fillpattern 1.0.2 2024-06-24 [1] RSPM
Formula 1.2-5 2023-02-24 [1] RSPM (R 4.5.0)
fs 1.6.5 2024-10-30 [1] RSPM (R 4.5.0)
generics * 0.1.3 2022-07-05 [1] RSPM (R 4.5.0)
GenomeInfoDb * 1.43.4 2025-01-24 [1] Bioconduc~
GenomeInfoDbData 1.2.14 2025-03-27 [1] Bioconductor
GenomicRanges * 1.59.1 2024-11-15 [1] Bioconduc~
ggbeeswarm 0.7.2 2023-04-29 [1] RSPM
ggnewscale 0.5.1 2025-02-24 [1] RSPM
ggplot2 * 3.5.1 2024-04-23 [1] RSPM (R 4.5.0)
ggpubr 0.6.0 2023-02-10 [1] RSPM
ggrepel 0.9.6 2024-09-07 [1] RSPM (R 4.5.0)
ggsignif 0.6.4 2022-10-13 [1] RSPM (R 4.5.0)
ggtext 0.1.2 2022-09-16 [1] RSPM
glue 1.8.0 2024-09-30 [1] RSPM (R 4.5.0)
gridExtra 2.3 2017-09-09 [1] RSPM
gridtext 0.1.5 2022-09-16 [1] RSPM
gtable 0.3.6 2024-10-25 [1] RSPM (R 4.5.0)
here 1.0.1 2020-12-13 [1] RSPM (R 4.5.0)
hms 1.1.3 2023-03-21 [1] RSPM (R 4.5.0)
htmltools 0.5.8.1 2024-04-04 [1] RSPM (R 4.5.0)
htmlwidgets 1.6.4 2023-12-06 [1] RSPM (R 4.5.0)
httpuv 1.6.15 2024-03-26 [1] RSPM
httr 1.4.7 2023-08-15 [1] CRAN (R 4.5.0)
igraph 2.1.4 2025-01-23 [1] RSPM
IRanges * 2.41.3 2025-02-12 [1] Bioconduc~
irlba 2.3.5.1 2022-10-03 [1] RSPM
jsonlite 1.9.1 2025-03-03 [1] RSPM (R 4.5.0)
knitr 1.50 2025-03-16 [1] RSPM
labeling 0.4.3 2023-08-29 [1] RSPM (R 4.5.0)
later 1.4.1 2024-11-27 [1] RSPM (R 4.5.0)
lattice 0.22-6 2024-03-20 [2] CRAN (R 4.5.0)
lazyeval 0.2.2 2019-03-15 [1] RSPM (R 4.5.0)
lifecycle 1.0.4 2023-11-07 [1] RSPM (R 4.5.0)
magrittr * 2.0.3 2022-03-30 [1] RSPM (R 4.5.0)
MASS 7.3-65 2025-02-28 [2] CRAN (R 4.5.0)
Matrix 1.7-3 2025-03-11 [2] CRAN (R 4.5.0)
MatrixGenerics * 1.19.1 2025-01-09 [1] Bioconduc~
matrixStats * 1.5.0 2025-01-07 [1] RSPM
memoise 2.0.1 2021-11-26 [1] RSPM (R 4.5.0)
mgcv 1.9-1 2023-12-21 [2] CRAN (R 4.5.0)
mia 1.15.32 2025-03-17 [1] Bioconduc~
mime 0.13 2025-03-17 [1] RSPM (R 4.5.0)
miniUI 0.1.1.1 2018-05-18 [1] RSPM
multcomp 1.4-28 2025-01-29 [1] RSPM
MultiAssayExperiment 1.33.9 2025-01-29 [1] Bioconduc~
munsell 0.5.1 2024-04-01 [1] RSPM (R 4.5.0)
mvtnorm 1.3-3 2025-01-10 [1] RSPM (R 4.5.0)
nlme 3.1-167 2025-01-27 [2] CRAN (R 4.5.0)
parallelly 1.43.0 2025-03-24 [1] RSPM
patchwork * 1.3.0 2024-09-16 [1] RSPM (R 4.5.0)
permute 0.9-7 2022-01-27 [1] RSPM (R 4.5.0)
pillar 1.10.1 2025-01-07 [1] RSPM (R 4.5.0)
pkgbuild 1.4.7 2025-03-24 [1] RSPM
pkgconfig 2.0.3 2019-09-22 [1] RSPM (R 4.5.0)
pkgload 1.4.0 2024-06-28 [1] RSPM
plotly 4.10.4 2024-01-13 [1] RSPM (R 4.5.0)
plyr 1.8.9 2023-10-02 [1] RSPM (R 4.5.0)
profvis 0.4.0 2024-09-20 [1] RSPM
promises 1.3.2 2024-11-28 [1] RSPM (R 4.5.0)
purrr 1.0.4 2025-02-05 [1] RSPM
R6 2.6.1 2025-02-15 [1] RSPM (R 4.5.0)
ragg 1.3.3 2024-09-11 [1] RSPM
rbiom 2.1.2 2025-03-18 [1] RSPM
RColorBrewer 1.1-3 2022-04-03 [1] RSPM (R 4.5.0)
Rcpp 1.0.14 2025-01-12 [1] RSPM (R 4.5.0)
readr 2.1.5 2024-01-10 [1] RSPM
readxl 1.4.5 2025-03-07 [1] RSPM
remotes 2.5.0 2024-03-17 [1] RSPM
reshape2 1.4.4 2020-04-09 [1] RSPM (R 4.5.0)
rhdf5 2.51.2 2025-01-08 [1] Bioconduc~
rhdf5filters 1.19.2 2025-03-05 [1] Bioconduc~
Rhdf5lib 1.29.2 2025-03-24 [1] Bioconduc~
rlang 1.1.5 2025-01-17 [1] RSPM (R 4.5.0)
rmarkdown 2.29 2024-11-04 [1] RSPM
rprojroot 2.0.4 2023-11-05 [1] RSPM (R 4.5.0)
rstatix 0.7.2 2023-02-01 [1] RSPM
rstudioapi 0.17.1 2024-10-22 [1] RSPM (R 4.5.0)
rsvd 1.0.5 2021-04-16 [1] RSPM
S4Arrays 1.7.3 2025-02-09 [1] Bioconduc~
S4Vectors * 0.45.4 2025-02-11 [1] Bioconduc~
sandwich 3.1-1 2024-09-15 [1] RSPM
ScaledMatrix 1.15.0 2024-10-29 [1] Bioconduc~
scales 1.3.0 2023-11-28 [1] RSPM (R 4.5.0)
scater 1.35.4 2025-03-07 [1] Bioconduc~
scuttle 1.17.0 2024-10-29 [1] Bioconduc~
sessioninfo 1.2.3 2025-02-05 [1] RSPM
shiny 1.10.0 2024-12-14 [1] RSPM
SingleCellExperiment * 1.29.2 2025-03-07 [1] Bioconduc~
slam 0.1-55 2024-11-13 [1] RSPM
SparseArray 1.7.7 2025-03-19 [1] Bioconduc~
sparseMatrixStats 1.19.0 2024-10-29 [1] Bioconduc~
stringi 1.8.4 2024-05-06 [1] RSPM (R 4.5.0)
stringr 1.5.1 2023-11-14 [1] RSPM (R 4.5.0)
SummarizedExperiment * 1.37.0 2024-10-29 [1] Bioconduc~
survival 3.8-3 2024-12-17 [2] CRAN (R 4.5.0)
systemfonts 1.2.1 2025-01-20 [1] RSPM
textshaping 1.0.0 2025-01-20 [1] RSPM
TH.data 1.1-3 2025-01-17 [1] RSPM
tibble 3.2.1 2023-03-20 [1] RSPM (R 4.5.0)
tidyplots 0.2.2 2025-03-07 [1] RSPM
tidyr * 1.3.1 2024-01-24 [1] RSPM (R 4.5.0)
tidyselect 1.2.1 2024-03-11 [1] RSPM (R 4.5.0)
tidySingleCellExperiment * 1.17.0 2024-10-29 [1] Bioconductor 3.21 (R 4.5.0)
tidytree 0.4.6 2023-12-12 [1] RSPM
treeio 1.31.0 2024-10-29 [1] Bioconduc~
TreeSummarizedExperiment 2.15.0 2024-10-29 [1] Bioconduc~
ttservice * 0.4.1 2024-06-07 [1] RSPM (R 4.5.0)
tzdb 0.5.0 2025-03-15 [1] RSPM (R 4.5.0)
UCSC.utils 1.3.1 2025-01-15 [1] Bioconduc~
urlchecker 1.0.1 2021-11-30 [1] RSPM
usethis 3.1.0 2024-11-26 [1] RSPM
vctrs 0.6.5 2023-12-01 [1] RSPM (R 4.5.0)
vegan 2.6-10 2025-01-29 [1] RSPM (R 4.5.0)
vipor 0.4.7 2023-12-18 [1] RSPM
viridis 0.6.5 2024-01-29 [1] RSPM
viridisLite 0.4.2 2023-05-02 [1] RSPM (R 4.5.0)
withr 3.0.2 2024-10-28 [1] RSPM (R 4.5.0)
xfun 0.51 2025-02-19 [1] RSPM
xml2 1.3.8 2025-03-14 [1] CRAN (R 4.5.0)
xtable 1.8-4 2019-04-21 [1] RSPM
XVector 0.47.2 2025-01-08 [1] Bioconduc~
yaml 2.3.10 2024-07-26 [1] RSPM
yulab.utils 0.2.0 2025-01-29 [1] RSPM
zoo 1.8-13 2025-02-22 [1] RSPM
[1] /home/fcatala/R/x86_64-pc-linux-gnu-library/4.5
[2] /opt/R/next/lib/R/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────
8.2 Contact
- Analysis Lead: Francesc Català-Moll
- Email: fcatala@irsicaixa.es
- Institution: GEM - IrsiCaixa